July 30, 2015

Data aquisition

Following tracks (each containing averaged signal for 2 replicates) are used:

  • HTZ1^JA00001__IL1^F^N2^L3_IL010_and_IL2^F^N2^L3_IL009__NORM^linear^1bp_averaged.bw
  • H3K4me3^ab8580__rc03^F^Cfp1-GFP^L3_RC007_and_rc04^F^Cfp1-GFP^L3_RC008__NORM^linear^1bp_averaged.bw
  • H3K36me3^HK00001__PK07^F^N2^L3_PK007_and_PK06^F^N2^L3_PK006__NORM^linear^1bp_averaged.bw

Loading data and libraries

require(seqplots)
require(magrittr)
devtools::load_all("/Users/przemol/code/seqplotsR")

tracks <- dir('tracks', pattern = 'bw', full.names = TRUE)
features <- dir('features', pattern = 'bed', full.names = TRUE)

top_limits <- list(c(-0.3, 2.5), c(0, 6.5), c(-0.3, 3.5), c(6, 12))
colvec <- list(
    NA, 
    c('blue', 'white', 'red'), 
    c('blue', 'white', 'red'), 
    c('white', '#66ccff', '#000066')
)

tracks
## [1] "tracks/H2A.Z.bw"    "tracks/H3K36me3.bw" "tracks/H3K4me3.bw"
features
## [1] "features/TSS_bottom_20pct.bed" "features/TSS_fourth_20pct.bed"
## [3] "features/TSS_second_20pct.bed" "features/TSS_third_20pct.bed" 
## [5] "features/TSS_top_20pct.bed"

Data process

m1 <- getPlotSetArray(
    tracks = tracks,
    features = grep('top', features, value = TRUE),
    refgenome = 'ce10',
    bin = 10L, xmin = 1000L, xmax = 1000L
)

m2 <- getPlotSetArray(
    tracks = grep('H3K4me3', tracks, value = TRUE, ignore.case = TRUE),
    features = grep('top|third|bot', features, value = TRUE),
    refgenome = 'ce10',
    bin = 10L, xmin = 1000L, xmax = 1000L
)


# Bottom figures c and d
ms <- MotifSetup()
ms$addBigWig(tracks[2])$addBigWig(tracks[3])$addBigWig(tracks[1])
## MotifSetup with 3 motifs/tracks.
ms$addMotif('CG', revcomp = FALSE, name = "CpG")
## MotifSetup with 4 motifs/tracks.
M <- getPlotSetArray(
    tracks = ms,
    features = features[5],
    refgenome = 'ce10',
    bin = 10L, xmin = 1000L, xmax = 1500L
)

Function for plotting c) and e)

plotTopBar <- function(M, titles=LETTERS, limits=NULL) {
    f = 37
    layout(
        matrix(c(1:100), 2, 4, byrow = FALSE), respect = TRUE,
        heights = c(.3, rep(1,5)), widths = c(1)
    )
    pp <- function() {
        par(mgp = c(0, 2.5, 0))
        axis(
            1, at = c(min(M[[1]]$all_ind), 0, max(M[[1]]$all_ind)),
            labels = c('-1kb', 'TSS', '+1.5kb'), cex.axis = f / 12
        )
        par(mgp = c(0, 1, 0))
    }
    plt <- function(p, ylim = NULL, ...) {
        for (i in 1:p$npaires()) {
            plot(
                p[i], ylim = ylim, keepratio = TRUE, legend = FALSE, ln.h = 0,
                cex.axis = f, cex.lab = f, ln.v = TRUE, xlab = '',
                cex.main = .01, panel.first = pp(), xaxt = "n", col = 'black', ...
            )
        }
    }
    
    par(mgp = c(3, 0, 0))
    
    plot.new(); title(xlab = titles[[1]], col.main = 'red', cex.lab = 6)
    plt(unlist(M[1,1]), ylim = limits[[1]], main = titles[[1]])
    
    par(mgp = c(3, 0, 0))
    
    plot.new(); title(xlab = titles[[2]], col.main = 'red', cex.lab = 6)
    plt(unlist(M[1,2]), ylim = limits[[2]], main = titles[[2]])
    
    par(mgp = c(3, 0, 0))
    
    plot.new(); title(xlab = titles[[3]], col.main = 'red', cex.lab = 6)
    plt(unlist(M[1,3]), ylim = limits[[3]], main = titles[[3]])
    
    par(mgp = c(3, 0, 0))
    
    plot.new(); title(xlab = titles[[4]], col.main = 'red', cex.lab = 6)
    plt(unlist(M[1,4]), ylim = limits[[4]], main = titles[[4]])
}

Figures

Plot A

plotAverage(
    m1, labels = colnames(m1$as.array())  %>% strsplit('_')  %>% sapply('[[', 1), 
    legend_pos = 'topleft', legend_ext = TRUE, legend_ext_pos = 'topright',
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22,
    main = "Top expression quintile", xlab = "TSS", ylab = "Z-scored ChIP signal"
)

dev.print(pdf, file='fa2.pdf'); dev.print(png, file='fa2.png', height=8.27, width=11.7, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot B

plotAverage(
    m2[3:1], labels = c("TSS top 20%", "TSS middle 20%", "TSS bottom 20%"), 
    legend_pos = 'topleft', legend_ext = TRUE, legend_ext_pos = 'topright',
    cex.legend = 17, cex.main = 36, cex.lab = 28, cex.axis = 22,
    main = "H3K4me3 ChIP-seq signal", xlab = "TSS", ylab = "Z-scored ChIP signal"
)

dev.print(pdf, file='fb2.pdf'); dev.print(png, file='fb2.png', height=8.27, width=11.7, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot C

plotTopBar(
    M, M$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    limits = top_limits
)

dev.print(pdf, file='fc.pdf'); dev.print(png, file='fc.png', height=2*4, width=6*4, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot D

cls <- plotHeatmap(
    M, labels = M$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    raster = TRUE, include = c(T,F,F,F), sortrows = 'decreasing', clusters = 3, 
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22, colvec = colvec,
    o_min = c(-1, -2, -1, 0), o_max = c(3, 8, 4, 16)
)

dev.print(pdf, file='fd.pdf'); dev.print(png, file='fd.png', height=8.27, width=11.7, res=72, units = "in");
## quartz_off_screen 
##                 2
## quartz_off_screen 
##                 2

Plot E

export.bed(import.bed(features[5])[cls$ClusterID==1], 'flt.bed')
MF <- getPlotSetArray(
    tracks = ms,
    features = 'flt.bed',
    refgenome = 'ce10',
    bin = 10L, xmin = 1000L, xmax = 1500L
)
plotTopBar( 
    MF, MF$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    limits = top_limits
)

dev.print(pdf, file='fe.pdf'); dev.print(png, file='fe.png', height=2*4, width=6*4, res=72, units = "in");
## pdf 
##   2
## pdf 
##   2

Plot F

plotHeatmap(
    MF, labels = MF$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    raster = TRUE, include = c(F,T,T,F), sortrows = 'decreasing',
    clstmethod = 'ssom', ssomt1 = 2, ssomt2 = 2, clusters = 3,
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22, colvec = colvec,
    o_min = c(NA, NA, NA, 4), o_max = c(NA, NA, NA, 16)
)

dev.print(pdf, file='ff.pdf'); dev.print(png, file='ff.png', height=8.27, width=11.7, res=72, units = "in");
## quartz_off_screen 
##                 2
## quartz_off_screen 
##                 2

Assembled figure (click for full page) + Printable - static HTML

Alternative figures

Plot D - reanked by CpG in preomoter region

plotHeatmap(
    M2, labels = M2$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    raster = TRUE, include = c(T,F,F,F), sortrows = 'none', clusters = 3, 
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22, colvec = colvec,
    o_min = c(-1, -2, -1, 0), o_max = c(3, 8, 4, 16)
)

dev.print(pdf, file='fdalt.pdf')
## quartz_off_screen 
##                 2

Plot D - reanked by CpG w/o clustering

plotHeatmap(
    M2, labels = M2$as.array()  %>% colnames %>% strsplit('_') %>% sapply('[[', 1),
    raster = TRUE, include = c(T,F,F,F), sortrows = 'none', clusters = 0, 
    cex.legend = 18, cex.main = 36, cex.lab = 28, cex.axis = 22, colvec = colvec,
    o_min = c(-1, -2, -1, 0), o_max = c(3, 8, 4, 16)
)

dev.print(pdf, file='fdalt2.pdf')
## quartz_off_screen 
##                 2

Plot D - AATAAA TTS (endpoint) motif

## MotifSetup with 4 motifs/tracks.

## quartz_off_screen 
##                 2

Plot D - TTTCAG motif on 3' of exons

## MotifSetup with 4 motifs/tracks.

## quartz_off_screen 
##                 2